################################################################################
#
#                 Bringing Home the Bacon (World Politics)
#
#                               Kim & Fu  
#
#                              August 2024
#
################################################################################

library(tidyverse)
library(ggthemes)
library(lubridate)
library(stringr)
library(lfe)
library(stargazer)

################# PART 1: DATA PREPARATION #################

# set working directory
setwd("~/")

# 1. Load Ambassador Data
ambassador_raw <- read_csv("ambassador_us_30_covariates.csv")
ambassador_raw <- ambassador_raw %>% 
  mutate(year_start  = as.character(year_start),
         year_end    = as.character(year_end),
         month_start = as.character(month_start),
         month_end   = as.character(month_end)) %>%
  mutate(month_start = ifelse(nchar(month_start) == 1, paste0("0", month_start), month_start)) %>%
  mutate(month_end = ifelse(nchar(month_end) == 1, paste0("0", month_end), month_end)) %>%
  mutate(year_month_start = as.numeric(paste0(year_start, ".", month_start))) %>%
  mutate(year_month_end = as.numeric(paste0(year_end, ".", month_end)))

x <- read_csv("state_year_month.csv")
x1 <- x %>% mutate(country = "Canada") %>% select(country, state, year, month)
x2 <- x %>% mutate(country = "Mexico") %>% select(country, state, year, month)
x3 <- x %>% mutate(country = "China") %>% select(country, state, year, month)
x4 <- x %>% mutate(country = "Japan") %>% select(country, state, year, month)
x5 <- x %>% mutate(country = "United Kingdom") %>% select(country, state, year, month)
x6 <- x %>% mutate(country = "Germany") %>% select(country, state, year, month)
x7 <- x %>% mutate(country = "South Korea") %>% select(country, state, year, month)
x8 <- x %>% mutate(country = "Netherlands") %>% select(country, state, year, month)
x9 <- x %>% mutate(country = "Brazil") %>% select(country, state, year, month)
x10 <- x %>% mutate(country = "France") %>% select(country, state, year, month)
x11 <- x %>% mutate(country = "India") %>% select(country, state, year, month)
x12 <- x %>% mutate(country = "Belgium") %>% select(country, state, year, month)
x13 <- x %>% mutate(country = "Singapore") %>% select(country, state, year, month)
x14 <- x %>% mutate(country = "Australia") %>% select(country, state, year, month)
x15 <- x %>% mutate(country = "Switzerland") %>% select(country, state, year, month)
x16 <- x %>% mutate(country = "Italy") %>% select(country, state, year, month)
x17 <- x %>% mutate(country = "United Arab Emirates") %>% select(country, state, year, month)
x18 <- x %>% mutate(country = "Saudi Arabia") %>% select(country, state, year, month)
x19 <- x %>% mutate(country = "Malaysia") %>% select(country, state, year, month)
x20 <- x %>% mutate(country = "Israel") %>% select(country, state, year, month)
x21 <- x %>% mutate(country = "Colombia") %>% select(country, state, year, month)
x22 <- x %>% mutate(country = "Chile") %>% select(country, state, year, month)
x23 <- x %>% mutate(country = "Spain") %>% select(country, state, year, month)
x24 <- x %>% mutate(country = "Thailand") %>% select(country, state, year, month)
x25 <- x %>% mutate(country = "Turkey") %>% select(country, state, year, month)
x26 <- x %>% mutate(country = "Ireland") %>% select(country, state, year, month)
x27 <- x %>% mutate(country = "Venezuela") %>% select(country, state, year, month)
x28 <- x %>% mutate(country = "Philippines") %>% select(country, state, year, month)
x29 <- x %>% mutate(country = "Argentina") %>% select(country, state, year, month)
x30 <- x %>% mutate(country = "Dominican Republic") %>% select(country, state, year, month)


base <- rbind(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30)
rm(x, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30)

base1 <- base %>% 
  mutate(year = as.character(year), 
         month = as.character(month)) %>%
  mutate(month = ifelse(nchar(month) == 1, paste0("0", month), month)) %>%
  mutate(year_month = as.numeric(paste0(year, ".", month)))

base2 <- base1 %>%
  full_join(ambassador_raw) %>%
  filter(year_month >= year_month_start & year_month <= year_month_end) 

ambassador_merge <- base1 %>%
  left_join(base2) %>%
  select(-year_month, -year_month_end) 

rm(base, base1, base2)

nrow(ambassador_merge)

# 2. Load Export Data for the Top 30 US export partners
export <- read_csv("export.csv")

# Country-State-Industry-Time Level
export_all <- export %>%
  filter(!state %in% c("Puerto Rico", "US Virgin Islands", "Unknown")) %>%
  mutate(year = paste0("20", str_extract(month_year, regex("[0-9]+")))) %>%
  mutate(month = str_extract(month_year, regex("[A-Z][a-z]+"))) %>%
  mutate(month = case_when(month == "Jan" ~ "01", 
                           month == "Feb" ~ "02",
                           month == "Mar" ~ "03",
                           month == "Apr" ~ "04",
                           month == "May" ~ "05",
                           month == "Jun" ~ "06",
                           month == "Jul" ~ "07", 
                           month == "Aug" ~ "08",
                           month == "Sep" ~ "09",
                           month == "Oct" ~ "10",
                           month == "Nov" ~ "11",
                           month == "Dec" ~ "12")) %>%
  filter(year != "2021") %>%
  mutate(total_export_value = ifelse(is.na(total_export_value), 0, total_export_value)) %>%
  select(country, state, year, month, naics, total_export_value)

# Country-State-Time Level
export_cst <- export_all %>%
  group_by(country, state, year, month) %>%
  summarize(total_export_value = sum(total_export_value)) %>%
  ungroup()

export_ct <- export_cst %>%
  group_by(country, year, month) %>%
  summarize(total_export_ct = sum(total_export_value)) %>%
  ungroup()

# weights
export_s <- export_all %>%
  group_by(country, state) %>%
  summarize(total_export_cs = sum(total_export_value)) %>%
  mutate(max_export_cs = max(total_export_cs),
         min_export_cs = min(total_export_cs)) %>%
  mutate(wt_s = (total_export_cs - min_export_cs) / (max_export_cs - min_export_cs)) %>%
  select(country, state, wt_s)

export_sy <- export_all %>%
  group_by(country, state, year) %>%
  summarize(total_export_csy = sum(total_export_value)) %>%
  mutate(max_export_csy = max(total_export_csy),
         min_export_csy = min(total_export_csy)) %>%
  mutate(wt_sy = (total_export_csy - min_export_csy) / (max_export_csy - min_export_csy)) %>%
  select(country, state, year, wt_sy)

export_cs <- export_all %>%
  group_by(country, state) %>%
  summarize(total_export_cs = sum(total_export_value)) %>%
  ungroup() %>%
  mutate(max_export_cs = max(total_export_cs),
         min_export_cs = min(total_export_cs)) %>%
  mutate(wt_cs = (total_export_cs - min_export_cs) / (max_export_cs - min_export_cs)) %>%
  select(country, state, wt_cs)

export_csy <- export_all %>%
  group_by(country, state, year) %>%
  summarize(total_export_csy = sum(total_export_value)) %>%
  ungroup() %>%
  mutate(max_export_csy = max(total_export_csy),
         min_export_csy = min(total_export_csy)) %>%
  mutate(wt_csy = (total_export_csy - min_export_csy) / (max_export_csy - min_export_csy)) %>%
  select(country, state, year, wt_csy)

export_csi <- export_all %>%
  group_by(country, state, naics) %>%
  summarize(total_export_csi = sum(total_export_value)) %>%
  ungroup() %>%
  group_by(naics) %>%
  mutate(max_export_csi = max(total_export_csi),
         min_export_csi = min(total_export_csi)) %>%
  mutate(wt_csi = (total_export_csi - min_export_csi + 1) / (max_export_csi - min_export_csi + 1)) %>%
  select(country, state, naics, wt_csi)

export_c <- export_all %>%
  group_by(country) %>%
  summarize(total_export_c = sum(total_export_value)) %>%
  mutate(max_export_c = max(total_export_c),
         min_export_c = min(total_export_c)) %>%
  mutate(wt_c = (total_export_c - min_export_c) / (max_export_c - min_export_c)) %>%
  select(country, wt_c)

export_cy <- export_all %>%
  group_by(country, year) %>%
  summarize(total_export_cy = sum(total_export_value)) %>%
  mutate(max_export_cy = max(total_export_cy),
         min_export_cy = min(total_export_cy)) %>%
  mutate(wt_cy = (total_export_cy - min_export_cy) / (max_export_cy - min_export_cy)) %>%
  select(country, year, wt_cy)

rm(export, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30)

# 3. Co-variates
# state-month unemployment rate
unemp <- read_csv("state_month_unemployment.csv")

unemp <- unemp %>% 
  mutate(year = as.character(year),
         month = as.character(month)) %>%
  mutate(month = ifelse(str_count(month) == 1, paste0("0", month), month))


# 3-3.Destination country-related: annual GDP (unit: US billion) and population 
imf <- read.csv("gdp_imf.csv", as.is=T, na.strings = c("", NA))

## Extract gdp by country-year
imf_gdp <- imf %>%
  filter(Country %in% c("Canada", "Mexico", "China", "Japan", "United Kingdom",
                        "Germany", "South Korea", "Netherlands", "Brazil", "France",
                        "India", "Belgium", "Singapore", "Australia", "Switzerland",
                        "Italy", "United Arab Emirates", "Saudi Arabia", "Malaysia", "Israel", 
                        "Colombia", "Chile", "Spain", "Thailand", "Turkey", 
                        "Ireland", "Venezuela", "Philippines", "Argentina", "Dominican Republic"))%>%
  filter(Subject.Descriptor == "Gross domestic product, current prices" &
           Subject.Notes == "Values are based upon GDP in national currency converted to U.S. dollars using market exchange rates (yearly average). Exchange rate projections are provided by country economists for the group of other emerging market and developing countries. Exchanges rates for advanced economies are established in the WEO assumptions for each WEO exercise. Expenditure-based GDP is total final expenditures at purchasers' prices (including the f.o.b. value of exports of goods and services), less the f.o.b. value of imports of goods and services. [SNA 1993]")%>%
  select(-Subject.Notes, -Units, -Scale, -Country.Series.specific.Notes, -Subject.Descriptor) %>%
  gather(year, gdp_usdb, X2002:X2021)%>%
  mutate(Year = str_sub(year, 2)) %>% 
  select(-year) 

imf_gdp$gdp_usdb <- as.numeric(gsub(",","",imf_gdp$gdp_usdb))

imf_gdp <- imf_gdp %>% 
  mutate(gdp_logged = log(gdp_usdb*1000000000)) %>%
  rename(destin_gdp_usdb = gdp_usdb,
         destin_gdp_logged = gdp_logged,
         country = Country,
         year = Year)

## Extract population by country-year (million)
imf_population <- imf %>% 
  filter(Country %in% c("Canada", "Mexico", "China", "Japan", "United Kingdom",
                        "Germany", "South Korea", "Netherlands", "Brazil", "France",
                        "India", "Belgium", "Singapore", "Australia", "Switzerland",
                        "Italy", "United Arab Emirates", "Saudi Arabia", "Malaysia", "Israel", 
                        "Colombia", "Chile", "Spain", "Thailand", "Turkey", 
                        "Ireland", "Venezuela", "Philippines", "Argentina", "Dominican Republic"))%>%
  filter(Subject.Descriptor == "Population")%>%
  select(-Subject.Notes, -Units, -Scale, -Country.Series.specific.Notes, -Subject.Descriptor) %>%
  gather(year, destin_pop, X2002:X2021) %>%
  mutate(Year = str_sub(year, 2)) %>%
  select(-year)

imf_population$destin_pop <- as.numeric(gsub(",","",imf_population$destin_pop))

imf_population <- imf_population %>% 
  mutate(destin_pop_logged = log(destin_pop*1000000)) %>%
  rename(country = Country, 
         year = Year)


## country rank on export value
rank <- read_csv("us_export_ranking.csv")

rank <- rank %>%
  filter(country != "Taiwan") %>%
  filter(country != "Hong Kong") %>%
  arrange(desc(total_exp)) %>%
  mutate(rank = row_number()) %>%
  filter(rank <= 30) 


## core and swing states
voteshare0 <- read_csv("~/Dropbox/ambassodor/data/voteshare2000_2020.csv")


# Merge
export_ambassador <- export_all %>%
  mutate(log_export = log(total_export_value + 1)) %>%
  left_join(ambassador_merge) %>%
  mutate(ambassador_home_state = ifelse(state == home_state, 1, 0)) %>%
  mutate(ambassador_home_state = ifelse(is.na(ambassador_home_state), 0, ambassador_home_state)) %>%
  mutate(politician = ifelse(type == "politician", 1, 0)) %>%
  mutate(politician = ifelse(is.na(politician), 0, politician)) %>%
  mutate(diplomat = ifelse(type == "diplomat", 1, 0)) %>%
  mutate(diplomat = ifelse(is.na(diplomat), 0, diplomat)) %>%
  mutate(non_politician = ifelse(type == "non_politician", 1, 0)) %>%
  mutate(non_politician = ifelse(is.na(non_politician), 0, non_politician)) %>%
  mutate(type3 = type) %>%
  mutate(type3 = ifelse(is.na(type3), "vacancy", type3)) %>%
  mutate(type3 = factor(type3, levels = c("diplomat", "politician", "non_politician", "vacancy"))) %>%
  mutate(ambassador_home_stateXpolitician = ambassador_home_state * politician) %>%
  mutate(ambassador_home_stateXdiplomat = ambassador_home_state * diplomat) %>%
  mutate(ambassador_home_stateXnon_politician = ambassador_home_state * non_politician) %>%
  mutate(politician1 = ifelse(type_expanded1 == "politician", 1, 0)) %>%
  mutate(politician1 = ifelse(is.na(politician1), 0, politician1)) %>%
  mutate(diplomat1 = ifelse(type_expanded1 == "diplomat", 1, 0)) %>%
  mutate(diplomat1 = ifelse(is.na(type_expanded1), 0, diplomat1)) %>%
  mutate(non_politician1 = ifelse(type_expanded1 == "non_politician", 1, 0)) %>%
  mutate(non_politician1 = ifelse(is.na(type_expanded1), 0, non_politician1)) %>%
  mutate(ambassador_home_stateXpolitician1 = ambassador_home_state * politician1) %>%
  mutate(ambassador_home_stateXdiplomat1 = ambassador_home_state * diplomat1) %>%
  mutate(ambassador_home_stateXnon_politician1 = ambassador_home_state * non_politician1) %>%
  mutate(politician2 = ifelse(type_expanded2 == "politician", 1, 0)) %>%
  mutate(politician2 = ifelse(is.na(type_expanded2), 0, politician2)) %>%
  mutate(diplomat2 = ifelse(type_expanded2 == "diplomat", 1, 0)) %>%
  mutate(diplomat2 = ifelse(is.na(type_expanded2), 0, diplomat2)) %>%
  mutate(non_politician2 = ifelse(type_expanded2 == "non_politician", 1, 0)) %>%
  mutate(non_politician2 = ifelse(is.na(type_expanded2), 0, non_politician2)) %>%
  mutate(ambassador_home_stateXpolitician2 = ambassador_home_state * politician2) %>%
  mutate(ambassador_home_stateXdiplomat2 = ambassador_home_state * diplomat2) %>%
  mutate(ambassador_home_stateXnon_politician2 = ambassador_home_state * non_politician2) %>%
  mutate(vacant_month = ifelse(is.na(lastname), 1, 0)) %>%
  mutate(non_vacant_month = ifelse(vacant_month == 1, 0, 1)) %>%
  mutate(monthyear = paste0(month, "-", year)) %>%
  mutate(stateXsector = paste0(state, "X", naics)) %>%
  mutate(countryXstate = paste0(country, "X", state)) %>%
  mutate(countryXmonthyear = paste0(country, "X", monthyear)) %>%
  mutate(countryXstateXsector = paste(country, "X", state, "X", naics)) %>%
  mutate(sectorXmonthyear = paste0(naics, "X", monthyear)) %>%
  mutate(stateXmonthyear = paste0(state, "X", monthyear)) %>%
  left_join(rank) %>%
  left_join(export_csi)





# County-State-Month Level

export_ambassador_cst <- export_cst %>%
  mutate(log_export = log(total_export_value + 1)) %>%
  left_join(ambassador_merge) %>%
  mutate(ambassador_home_state = ifelse(state == home_state, 1, 0)) %>%
  mutate(ambassador_home_state = ifelse(is.na(ambassador_home_state), 0, ambassador_home_state)) %>%
  mutate(politician = ifelse(type == "politician", 1, 0)) %>%
  mutate(politician = ifelse(is.na(politician), 0, politician)) %>%
  mutate(diplomat = ifelse(type == "diplomat", 1, 0)) %>%
  mutate(diplomat = ifelse(is.na(diplomat), 0, diplomat)) %>%
  mutate(non_politician = ifelse(type == "non_politician", 1, 0)) %>%
  mutate(non_politician = ifelse(is.na(non_politician), 0, non_politician)) %>%
  mutate(type3 = type) %>%
  mutate(type3 = ifelse(is.na(type3), "vacancy", type3)) %>%
  mutate(type3 = factor(type3, levels = c("diplomat", "politician", "non_politician", "vacancy"))) %>%
  mutate(ambassador_home_stateXpolitician = ambassador_home_state * politician) %>%
  mutate(ambassador_home_stateXdiplomat = ambassador_home_state * diplomat) %>%
  mutate(ambassador_home_stateXnon_politician = ambassador_home_state * non_politician) %>%
  mutate(politician1 = ifelse(type_expanded1 == "politician", 1, 0)) %>%
  mutate(politician1 = ifelse(is.na(type_expanded1), 0, politician1)) %>%
  mutate(diplomat1 = ifelse(type_expanded1 == "diplomat", 1, 0)) %>%
  mutate(diplomat1 = ifelse(is.na(type_expanded1), 0, diplomat1)) %>%
  mutate(non_politician1 = ifelse(type_expanded1 == "non_politician", 1, 0)) %>%
  mutate(non_politician1 = ifelse(is.na(type_expanded1), 0, non_politician1)) %>%
  mutate(ambassador_home_stateXpolitician1 = ambassador_home_state * politician1) %>%
  mutate(ambassador_home_stateXdiplomat1 = ambassador_home_state * diplomat1) %>%
  mutate(ambassador_home_stateXnon_politician1 = ambassador_home_state * non_politician1) %>%
  mutate(politician2 = ifelse(type_expanded2 == "politician", 1, 0)) %>%
  mutate(politician2 = ifelse(is.na(politician2), 0, politician2)) %>%
  mutate(diplomat2 = ifelse(type_expanded2 == "diplomat", 1, 0)) %>%
  mutate(diplomat2 = ifelse(is.na(diplomat2), 0, diplomat2)) %>%
  mutate(non_politician2 = ifelse(type_expanded2 == "non_politician", 1, 0)) %>%
  mutate(non_politician2 = ifelse(is.na(non_politician2), 0, non_politician2)) %>%
  mutate(ambassador_home_stateXpolitician2 = ambassador_home_state * politician2) %>%
  mutate(ambassador_home_stateXdiplomat2 = ambassador_home_state * diplomat2) %>%
  mutate(ambassador_home_stateXnon_politician2 = ambassador_home_state * non_politician2) %>%
  mutate(vacant_month = ifelse(is.na(lastname), 1, 0)) %>%
  mutate(non_vacant_month = ifelse(vacant_month == 1, 0, 1)) %>%
  mutate(monthyear = paste0(month, "-", year)) %>%
  mutate(countryXstate = paste0(country, "X", state)) %>%
  mutate(countryXmonthyear = paste0(country, "X", monthyear)) %>%
  mutate(stateXmonthyear = paste0(state, "X", monthyear)) %>%
  left_join(unemp) %>%
  left_join(imf_gdp) %>%
  left_join(imf_population) %>%
  left_join(rank) %>%
  left_join(export_s) %>%
  left_join(export_sy) %>%
  left_join(export_cs) %>%
  left_join(export_csy) %>%
  left_join(export_ct) %>%
  mutate(export_pct_s = total_export_value / total_export_ct * 100) %>%
  mutate(president = case_when(year %in% c(2002:2008) ~ "Bush",
                               year %in% c(2009:2016) ~ "Obama",
                               year %in% c(2017:2020) ~ "Trump")) %>%
  mutate(countryXstateXpresident = paste0(country, "X", state, "X", president)) %>%
  mutate(countryXstateXyear = paste0(country, "X", state, "X", year)) %>%
  mutate(half = ifelse(month %in% c("01", "02", "03", "04", "05", "06"), "1", "2")) %>%
  mutate(year_half = paste0(year, "_", half)) %>%
  mutate(quarter = case_when(month %in% c("1", "2", "3") ~ "1",
                             month %in% c("4", "5", "6") ~ "2",
                             month %in% c("7", "8", "9") ~ "3",
                             month %in% c("10", "11", "12") ~ "4")) %>%
  mutate(year_quarter = paste0(year, "_", quarter)) %>%
  mutate(electionyear = ifelse(year %in% c(2002:2004), 2000,
                        ifelse(year %in% c(2005:2008), 2004,
                        ifelse(year %in% c(2009:2012), 2008,
                        ifelse(year %in% c(2013:2016), 2012,
                        ifelse(year %in% c(2017:2020), 2016, 2020)))))) %>%
  left_join(voteshare0) %>%
  mutate(corestate_non_vacant = corestate * non_vacant_month) %>%
  mutate(swingstate_non_vacant = swingstate * non_vacant_month) %>%
  mutate(black = ifelse(race == "black", 1, 0),
         hispanic = ifelse(race == "hispanic", 1, 0),
         asian = ifelse(race == "asian", 1, 0))


# County-Month Level
export_ambassador_ct <- export_ambassador_cst %>%
  group_by(country, year, month, monthyear, diplomat, politician, non_politician, age, female, race, destin_gdp_logged, destin_pop_logged) %>%
  summarize(total_export_value = sum(total_export_value)) %>%
  mutate(log_export = log(total_export_value + 1)) %>%
  left_join(export_cy, by = c("country", "year")) %>%
  left_join(export_c, by = c("country"))

rm(export_all, export_cs, export_cst, export_ct, imf, imf_gdp, imf_population, rank, unemp, export_c, export_csi, export_csy, export_cy, export_s, export_sy)


# Export Top30 Destinations
top30exp <- read_csv("top30exp.csv")
exp30 <- read_csv("exp30.csv")


save(export_ambassador, export_ambassador_cst, export_ambassador_ct, ambassador_raw, exp30, top30exp, upstreamness, file = "export_ambassador.RData")
